Chapter 5 Community composition
5.1 Gut microbiota
5.1.1 Taxonomy overview
5.1.1.1 Phylum level
genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(count > 0) %>% #filter 0 counts
ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_nested(~factor(environment, labels=c("low" = "Low altitude", "high" = "High altitude")), scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_rect(fill = "white"),
strip.text = element_text(size = 12, lineheight = 0.6,face="bold"),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")Number of MAGs
539
Number of bacteria phyla
13
Number of Archaea phyla
0
Phylum relative abundances
phylum_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>%
pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
left_join(genome_metadata, by = join_by(genome == genome)) %>%
group_by(sample,phylum,river, environment) %>%
summarise(relabun=sum(count))phylum_summary %>%
group_by(phylum) %>%
summarise(total_mean=mean(relabun*100, na.rm=T),
total_sd=sd(relabun*100, na.rm=T),
High_mean=mean(relabun[environment=="high"]*100, na.rm=T),
High_sd=sd(relabun[environment=="high"]*100, na.rm=T),
Low_mean=mean(relabun[environment=="low"]*100, na.rm=T),
Low_sd=sd(relabun[environment=="low"]*100, na.rm=T)) %>%
mutate(Total=str_c(round(total_mean,3),"±",round(total_sd,3)),
High=str_c(round(High_mean,3),"±",round(High_sd,3)),
Low=str_c(round(Low_mean,3),"±",round(Low_sd,3))) %>%
arrange(-total_mean) %>%
dplyr::select(phylum,Total,High,Low)# A tibble: 13 × 4
phylum Total High Low
<chr> <chr> <chr> <chr>
1 p__Bacteroidota 56.017±16.856 57.802±15.145 54.344±18.655
2 p__Bacillota_A 17.723±6.394 16.389±6.41 18.973±6.322
3 p__Pseudomonadota 11.568±13.498 8.774±7.907 14.187±17.056
4 p__Bacillota 5.475±8.813 6.263±12.015 4.736±4.405
5 p__Verrucomicrobiota 4.116±4.507 4.218±4.909 4.02±4.256
6 p__Desulfobacterota 1.795±1.741 1.747±1.616 1.839±1.902
7 p__Fusobacteriota 1.369±2.181 2.692±2.505 0.128±0.513
8 p__Bacillota_C 0.653±0.926 0.426±0.37 0.865±1.22
9 p__Deferribacterota 0.602±0.769 0.923±0.952 0.3±0.369
10 p__Cyanobacteriota 0.37±0.498 0.36±0.511 0.379±0.503
11 p__Bacillota_B 0.155±0.141 0.191±0.143 0.12±0.134
12 p__Elusimicrobiota 0.126±0.428 0.199±0.59 0.058±0.175
13 p__Chlamydiota 0.033±0.084 0.015±0.039 0.049±0.11
phylum_arrange <- phylum_summary %>%
group_by(phylum) %>%
summarise(mean=sum(relabun)) %>%
arrange(-mean) %>%
select(phylum) %>%
pull()
phylum_summary %>%
left_join(genome_metadata %>% select(phylum,phylum) %>% unique(),by=join_by(phylum==phylum)) %>%
# left_join(sample_metadata,by=join_by(sample==sample)) %>%
filter(phylum %in% phylum_arrange[1:20]) %>%
mutate(phylum=factor(phylum,levels=rev(phylum_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
geom_jitter(alpha=0.5) +
facet_grid(.~environment)+
theme_minimal() +
labs(y="phylum", x="Relative abundance", color="Phylum")Bacteria phyla in individuals from low altitude
13
Bacteria phyla in individuals from high altitude
13
phylum_summary %>%
group_by(phylum) %>%
summarise(total_mean=mean(relabun*100, na.rm=T),
total_sd=sd(relabun*100, na.rm=T),
le_mean=mean(relabun[river=="Leitzaran"]*100, na.rm=T),
le_sd=sd(relabun[river=="Leitzaran"]*100, na.rm=T),
ha_mean=mean(relabun[river=="Harpea"]*100, na.rm=T),
ha_sd=sd(relabun[river=="Harpea"]*100, na.rm=T),
er_mean=mean(relabun[river=="Erlan"]*100, na.rm=T),
er_sd=sd(relabun[river=="Erlan"]*100, na.rm=T),
go_mean=mean(relabun[river=="Goizueta"]*100, na.rm=T),
go_sd=sd(relabun[river=="Goizueta"]*100, na.rm=T)) %>%
mutate(Total=str_c(round(total_mean,3),"±",round(total_sd,3)),
Leitzaran=str_c(round(le_mean,3),"±",round(le_sd,3)),
Harpea=str_c(round(ha_mean,3),"±",round(ha_sd,3)),
Erlan=str_c(round(er_mean,3),"±",round(er_sd,3)),
Goizueta=str_c(round(go_mean,3),"±",round(go_sd,3))) %>%
arrange(-total_mean) %>%
dplyr::select(phylum,Total,Leitzaran,Goizueta,Harpea,Erlan)# A tibble: 13 × 6
phylum Total Leitzaran Goizueta Harpea Erlan
<chr> <chr> <chr> <chr> <chr> <chr>
1 p__Bacteroidota 56.017±16.856 60.117±14.174 49.855±21.212 56.148±20.609 59.249±9.464
2 p__Bacillota_A 17.723±6.394 19.382±7.428 18.654±5.77 16.773±5.991 16.053±7.151
3 p__Pseudomonadota 11.568±13.498 12.81±9.399 15.259±21.823 7.394±8.916 9.981±7.303
4 p__Bacillota 5.475±8.813 3.368±2.906 5.8±5.209 10.8±16.871 2.293±2.479
5 p__Verrucomicrobiota 4.116±4.507 2.429±3.463 5.257±4.586 1.69±1.837 6.431±5.773
6 p__Desulfobacterota 1.795±1.741 0.907±1.033 2.565±2.152 1.919±2.224 1.597±0.966
7 p__Fusobacteriota 1.369±2.181 0.293±0.775 0±0 3.777±2.857 1.743±1.828
8 p__Bacillota_C 0.653±0.926 0.278±0.324 1.322±1.475 0.321±0.194 0.519±0.469
9 p__Deferribacterota 0.602±0.769 0.175±0.305 0.397±0.402 0.684±0.385 1.132±1.257
10 p__Cyanobacteriota 0.37±0.498 0.095±0.215 0.601±0.559 0.264±0.461 0.444±0.569
11 p__Bacillota_B 0.155±0.141 0.146±0.154 0.1±0.121 0.199±0.128 0.184±0.162
12 p__Elusimicrobiota 0.126±0.428 0±0 0.102±0.228 0±0 0.374±0.789
13 p__Chlamydiota 0.033±0.084 0±0 0.088±0.138 0.031±0.054 0±0
5.1.1.2 Family level
Percentange of families in each group
family_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,family, river,environment) %>%
summarise(relabun=sum(count))family_summary %>%
group_by(family) %>%
summarise(total_mean=mean(relabun*100, na.rm=T),
total_sd=sd(relabun*100, na.rm=T),
High_mean=mean(relabun[environment=="high"]*100, na.rm=T),
High_sd=sd(relabun[environment=="high"]*100, na.rm=T),
Low_mean=mean(relabun[environment=="low"]*100, na.rm=T),
Low_sd=sd(relabun[environment=="low"]*100, na.rm=T)) %>%
mutate(Total=str_c(round(total_mean,3),"±",round(total_sd,3)),
High=str_c(round(High_mean,3),"±",round(High_sd,3)),
Low=str_c(round(Low_mean,3),"±",round(Low_sd,3))) %>%
arrange(-total_mean) %>%
dplyr::select(family,Total,High,Low)# A tibble: 59 × 4
family Total High Low
<chr> <chr> <chr> <chr>
1 f__Bacteroidaceae 27.472±14.072 26.66±11.936 28.232±16.18
2 f__Rikenellaceae 13.92±7.397 18.443±4.375 9.679±7.206
3 f__Tannerellaceae 7.721±4.561 5.819±2.671 9.504±5.286
4 f__Ruminococcaceae 5.794±4.189 6.941±3.771 4.718±4.391
5 f__Lachnospiraceae 4.729±3.338 3.083±2.906 6.272±3.025
6 f__Enterobacteriaceae 4.336±9.482 2.673±4.547 5.896±12.456
7 f__Akkermansiaceae 3.968±4.368 4.044±4.731 3.897±4.155
8 f__Marinifilaceae 3.739±3.555 2.958±1.959 4.471±4.529
9 f__Aeromonadaceae 3.216±5.116 1.775±2.855 4.567±6.381
10 f__Mycoplasmoidaceae 3.122±8.895 4.384±12.189 1.939±4.062
# ℹ 49 more rows
family_arrange <- family_summary %>%
group_by(family) %>%
summarise(mean=sum(relabun)) %>%
arrange(-mean) %>%
select(family) %>%
pull()
# Per environment
family_summary %>%
left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
filter(family %in% family_arrange[1:20]) %>%
mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
geom_jitter(alpha=0.5) +
facet_grid(.~environment)+
theme_minimal() +
labs(y="Family", x="Relative abundance", color="Phylum")family_summary %>%
group_by(family) %>%
summarise(total_mean=mean(relabun*100, na.rm=T),
total_sd=sd(relabun*100, na.rm=T),
le_mean=mean(relabun[river=="Leitzaran"]*100, na.rm=T),
le_sd=sd(relabun[river=="Leitzaran"]*100, na.rm=T),
ha_mean=mean(relabun[river=="Harpea"]*100, na.rm=T),
ha_sd=sd(relabun[river=="Harpea"]*100, na.rm=T),
er_mean=mean(relabun[river=="Erlan"]*100, na.rm=T),
er_sd=sd(relabun[river=="Erlan"]*100, na.rm=T),
go_mean=mean(relabun[river=="Goizueta"]*100, na.rm=T),
go_sd=sd(relabun[river=="Goizueta"]*100, na.rm=T)) %>%
mutate(Total=str_c(round(total_mean,3),"±",round(total_sd,3)),
Leitzaran=str_c(round(le_mean,3),"±",round(le_sd,3)),
Harpea=str_c(round(ha_mean,3),"±",round(ha_sd,3)),
Erlan=str_c(round(er_mean,3),"±",round(er_sd,3)),
Goizueta=str_c(round(go_mean,3),"±",round(go_sd,3))) %>%
arrange(-total_mean) %>%
dplyr::select(family,Total,Leitzaran,Goizueta,Harpea,Erlan)# A tibble: 59 × 6
family Total Leitzaran Goizueta Harpea Erlan
<chr> <chr> <chr> <chr> <chr> <chr>
1 f__Bacteroidaceae 27.472±14.072 34.786±20.422 23.135±10.548 27.17±12.675 26.215±12.113
2 f__Rikenellaceae 13.92±7.397 8.911±9.05 10.277±5.916 17.11±5.122 19.609±3.531
3 f__Tannerellaceae 7.721±4.561 9.923±5.108 9.177±5.705 5.979±2.836 5.679±2.706
4 f__Ruminococcaceae 5.794±4.189 4.235±4.644 5.094±4.428 6.708±3.914 7.146±3.9
5 f__Lachnospiraceae 4.729±3.338 5.815±2.118 6.629±3.671 2.699±1.253 3.419±3.907
6 f__Enterobacteriaceae 4.336±9.482 4.222±2.926 7.198±16.738 2.824±5.913 2.54±3.368
7 f__Akkermansiaceae 3.968±4.368 2.429±3.463 5.04±4.475 1.69±1.837 6.104±5.61
8 f__Marinifilaceae 3.739±3.555 4.765±6.475 4.243±2.624 2.306±1.732 3.528±2.075
9 f__Aeromonadaceae 3.216±5.116 6.023±7.391 3.435±5.664 0.515±0.676 2.878±3.597
10 f__Mycoplasmoidaceae 3.122±8.895 1.267±2.044 2.462±5.207 8.878±17.359 0.452±0.999
# ℹ 49 more rows
5.1.1.3 Genus level
*** Percetange of genera in each group***
genus_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,phylum,genus, environment) %>%
summarise(relabun=sum(count))
# %>%
# filter(genus != "g__") %>%
# mutate(genus= sub("^g__", "", genus))
genus_summary %>%
group_by(genus) %>%
summarise(total_mean=mean(relabun*100, na.rm=T),
total_sd=sd(relabun*100, na.rm=T),
High_mean=mean(relabun[environment=="high"]*100, na.rm=T),
High_sd=sd(relabun[environment=="high"]*100, na.rm=T),
Low_mean=mean(relabun[environment=="low"]*100, na.rm=T),
Low_sd=sd(relabun[environment=="low"]*100, na.rm=T)) %>%
mutate(Total=str_c(round(total_mean,3),"±",round(total_sd,3)),
High=str_c(round(High_mean,3),"±",round(High_sd,3)),
Low=str_c(round(Low_mean,3),"±",round(Low_sd,3))) %>%
arrange(-total_mean) %>%
dplyr::select(genus,Total,High,Low) # A tibble: 112 × 4
genus Total High Low
<chr> <chr> <chr> <chr>
1 g__Bacteroides 26.763±13.992 26.077±11.54 27.407±16.32
2 g__Parabacteroides 6.376±3.96 4.975±2.577 7.69±4.622
3 g__Mucinivorans 6.085±4.345 8.178±3.614 4.123±4.132
4 g__Aeromonas 3.216±5.116 1.775±2.855 4.567±6.381
5 g__Mycoplasma_L 2.603±8.972 4.309±12.215 1.003±4.012
6 g__Akkermansia 2.441±3.614 2.187±4.075 2.678±3.241
7 g__Odoribacter 2.389±2.236 1.859±1.22 2.886±2.84
8 g__JADFUS01 2.217±1.317 2.721±1.198 1.744±1.279
9 g__Hafnia 1.923±9.189 0.09±0.147 3.641±12.741
10 g__UBA866 1.817±2.179 1.759±1.596 1.871±2.668
# ℹ 102 more rows
genus_summary_sort <- genus_summary %>%
filter(genus != "g__") %>%
group_by(genus) %>%
summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
arrange(-mean)
genus_summary %>%
filter(genus %in% genus_summary_sort$genus[1:20]) %>%
mutate(genus=factor(genus, levels=rev(genus_summary_sort %>% pull(genus)))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
scale_color_manual(values=phylum_colors) +
geom_jitter(alpha=0.5) +
facet_grid(.~environment)+
theme_minimal() +
theme(axis.text.y = element_text(size=6))+
labs(y="Family", x="Relative abundance", color="Phylum")
Number of MAGs without genera-level annotation
133
| phylum | count_nogene | count_total | percentage |
|---|---|---|---|
| p__Bacillota | 4 | 23 | 17.39130 |
| p__Bacillota_A | 45 | 175 | 25.71429 |
| p__Bacillota_B | 3 | 4 | 75.00000 |
| p__Bacillota_C | 11 | 11 | 100.00000 |
| p__Bacteroidota | 24 | 191 | 12.56545 |
| p__Chlamydiota | 1 | 1 | 100.00000 |
| p__Cyanobacteriota | 7 | 12 | 58.33333 |
| p__Deferribacterota | 2 | 2 | 100.00000 |
| p__Desulfobacterota | 4 | 7 | 57.14286 |
| p__Elusimicrobiota | 1 | 2 | 50.00000 |
| p__Pseudomonadota | 18 | 87 | 20.68966 |
| p__Verrucomicrobiota | 13 | 18 | 72.22222 |
Percentage of MAGs without genus-level annotation
24.67532
Number of MAGs without species-level annotation
# A tibble: 1 × 1
Mag_nospecies
<int>
1 494
| phylum | count_nospecies | count_total | species_annotated | percentage | percentage_species |
|---|---|---|---|---|---|
| p__Bacillota | 22 | 23 | 1 | 95.65217 | 4.347826 |
| p__Bacillota_A | 175 | 175 | 0 | 100.00000 | 0.000000 |
| p__Bacillota_B | 4 | 4 | 0 | 100.00000 | 0.000000 |
| p__Bacillota_C | 11 | 11 | 0 | 100.00000 | 0.000000 |
| p__Bacteroidota | 191 | 191 | 0 | 100.00000 | 0.000000 |
| p__Chlamydiota | 1 | 1 | 0 | 100.00000 | 0.000000 |
| p__Cyanobacteriota | 12 | 12 | 0 | 100.00000 | 0.000000 |
| p__Deferribacterota | 2 | 2 | 0 | 100.00000 | 0.000000 |
| p__Desulfobacterota | 7 | 7 | 0 | 100.00000 | 0.000000 |
| p__Elusimicrobiota | 2 | 2 | 0 | 100.00000 | 0.000000 |
| p__Fusobacteriota | 4 | 6 | 2 | 66.66667 | 33.333333 |
| p__Pseudomonadota | 45 | 87 | 42 | 51.72414 | 48.275862 |
| p__Verrucomicrobiota | 18 | 18 | 0 | 100.00000 | 0.000000 |
Percentage of MAGs without species-level annotation
91.65121
5.2 Skin microbiota
5.2.1 Taxonomy overview
5.2.1.1 Phylum level
genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(count > 0) %>% #filter 0 counts
ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_nested(~factor(environment, labels=c("low" = "Low altitude", "high" = "High altitude")), scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_rect(fill = "white"),
strip.text = element_text(size = 12, lineheight = 0.6,face="bold"),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")Number of MAGs
43
Number of bacteria phyla
3
Number of Archaea phyla
0
Phylum relative abundances
phylum_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>%
pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
left_join(genome_metadata, by = join_by(genome == genome)) %>%
group_by(sample,phylum, environment, river) %>%
summarise(relabun=sum(count))phylum_summary %>%
group_by(phylum) %>%
summarise(total_mean=mean(relabun*100, na.rm=T),
total_sd=sd(relabun*100, na.rm=T),
High_mean=mean(relabun[environment=="high"]*100, na.rm=T),
High_sd=sd(relabun[environment=="high"]*100, na.rm=T),
Low_mean=mean(relabun[environment=="low"]*100, na.rm=T),
Low_sd=sd(relabun[environment=="low"]*100, na.rm=T)) %>%
mutate(Total=str_c(round(total_mean,3),"±",round(total_sd,3)),
High=str_c(round(High_mean,3),"±",round(High_sd,3)),
Low=str_c(round(Low_mean,6),"±",round(Low_sd,3))) %>%
arrange(-total_mean) %>%
dplyr::select(phylum,Total,High,Low)# A tibble: 3 × 4
phylum Total High Low
<chr> <chr> <chr> <chr>
1 p__Pseudomonadota 84.888±15.792 88.597±8.902 82.415673±18.92
2 p__Bacteroidota 14.869±15.806 10.797±8.675 17.584327±18.92
3 p__ 0.242±1.198 0.606±1.882 0±0
phylum_arrange <- phylum_summary %>%
group_by(phylum) %>%
summarise(mean=sum(relabun)) %>%
arrange(-mean) %>%
select(phylum) %>%
pull()
phylum_summary %>%
left_join(genome_metadata %>% select(phylum,phylum) %>% unique(),by=join_by(phylum==phylum)) %>%
# left_join(sample_metadata,by=join_by(sample==sample)) %>%
filter(phylum %in% phylum_arrange[1:20]) %>%
mutate(phylum=factor(phylum,levels=rev(phylum_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
geom_jitter(alpha=0.5) +
facet_grid(.~environment)+
theme_minimal() +
labs(y="phylum", x="Relative abundance", color="Phylum")Bacteria phyla in individuals from low altitude
[1] 2
Bacteria phyla in individuals from high altitude
[1] 3
phylum_summary %>%
group_by(phylum) %>%
summarise(total_mean=mean(relabun*100, na.rm=T),
total_sd=sd(relabun*100, na.rm=T),
le_mean=mean(relabun[river=="Leitzaran"]*100, na.rm=T),
le_sd=sd(relabun[river=="Leitzaran"]*100, na.rm=T),
ha_mean=mean(relabun[river=="Harpea"]*100, na.rm=T),
ha_sd=sd(relabun[river=="Harpea"]*100, na.rm=T),
er_mean=mean(relabun[river=="Erlan"]*100, na.rm=T),
er_sd=sd(relabun[river=="Erlan"]*100, na.rm=T),
go_mean=mean(relabun[river=="Goizueta"]*100, na.rm=T),
go_sd=sd(relabun[river=="Goizueta"]*100, na.rm=T)) %>%
mutate(Total=str_c(round(total_mean,3),"±",round(total_sd,3)),
Leitzaran=str_c(round(le_mean,6),"±",round(le_sd,3)),
Harpea=str_c(round(ha_mean,6),"±",round(ha_sd,3)),
Erlan=str_c(round(er_mean,6),"±",round(er_sd,3)),
Goizueta=str_c(round(go_mean,6),"±",round(go_sd,3))) %>%
arrange(-total_mean) %>%
dplyr::select(phylum,Total,Leitzaran,Goizueta,Harpea,Erlan)# A tibble: 3 × 6
phylum Total Leitzaran Goizueta Harpea Erlan
<chr> <chr> <chr> <chr> <chr> <chr>
1 p__Pseudomonadota 84.888±15.792 79.00655±24.425 85.824795±11.744 92.755803±3.456 86.517677±10.227
2 p__Bacteroidota 14.869±15.806 20.99345±24.425 14.175205±11.744 7.244197±3.456 12.573272±10.116
3 p__ 0.242±1.198 0±0 0±0 0±0 0.909051±2.292
5.2.1.2 Class level
Percentange of classes in each group
class_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,class, river,environment) %>%
summarise(relabun=sum(count))class_summary %>%
group_by(class) %>%
summarise(total_mean=mean(relabun*100, na.rm=T),
total_sd=sd(relabun*100, na.rm=T),
High_mean=mean(relabun[environment=="high"]*100, na.rm=T),
High_sd=sd(relabun[environment=="high"]*100, na.rm=T),
Low_mean=mean(relabun[environment=="low"]*100, na.rm=T),
Low_sd=sd(relabun[environment=="low"]*100, na.rm=T)) %>%
mutate(Total=str_c(round(total_mean,3),"±",round(total_sd,3)),
High=str_c(round(High_mean,3),"±",round(High_sd,3)),
Low=str_c(round(Low_mean,3),"±",round(Low_sd,3))) %>%
arrange(-total_mean) %>%
dplyr::select(class,Total,High,Low)# A tibble: 4 × 4
class Total High Low
<chr> <chr> <chr> <chr>
1 c__Gammaproteobacteria 83.327±16.587 86.945±9.263 80.915±19.96
2 c__Bacteroidia 14.869±15.806 10.797±8.675 17.584±18.92
3 c__Alphaproteobacteria 1.561±4.13 1.652±2.355 1.5±5.049
4 c__ 0.242±1.198 0.606±1.882 0±0
5.2.1.3 Family level
Percentange of families in each group
family_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,family, river,environment) %>%
summarise(relabun=sum(count))family_summary %>%
group_by(family) %>%
summarise(total_mean=mean(relabun*100, na.rm=T),
total_sd=sd(relabun*100, na.rm=T),
High_mean=mean(relabun[environment=="high"]*100, na.rm=T),
High_sd=sd(relabun[environment=="high"]*100, na.rm=T),
Low_mean=mean(relabun[environment=="low"]*100, na.rm=T),
Low_sd=sd(relabun[environment=="low"]*100, na.rm=T)) %>%
mutate(Total=str_c(round(total_mean,3),"±",round(total_sd,3)),
High=str_c(round(High_mean,3),"±",round(High_sd,3)),
Low=str_c(round(Low_mean,3),"±",round(Low_sd,3))) %>%
arrange(-total_mean) %>%
dplyr::select(family,Total,High,Low)# A tibble: 14 × 4
family Total High Low
<chr> <chr> <chr> <chr>
1 f__Methylophilaceae 29.667±26.384 48.878±23.032 16.86±20.25
2 f__Burkholderiaceae_B 27.235±24.427 18.967±12.87 32.747±28.814
3 f__Pseudomonadaceae 17.505±22.204 12.039±14.84 21.149±25.756
4 f__Spirosomaceae 13.217±15.853 7.659±6.105 16.923±19.189
5 f__Moraxellaceae 3.763±9.946 5.429±14.848 2.652±4.779
6 f__Burkholderiaceae 2.468±5.746 0.498±1.071 3.781±7.142
7 f__Burkholderiaceae_A 1.898±6.249 0.153±0.531 3.061±7.928
8 f__Sphingomonadaceae 1.403±4.051 1.256±1.956 1.5±5.049
9 f__Flavobacteriaceae 1.282±4.516 3.138±6.889 0.044±0.188
10 f__Alteromonadaceae 0.671±1.439 0.981±1.532 0.465±1.378
11 f__Weeksellaceae 0.37±2.028 0±0 0.617±2.618
12 f__ 0.242±1.198 0.606±1.882 0±0
13 f__UBA6184 0.158±0.866 0.395±1.37 0±0
14 f__Rhodocyclaceae 0.12±0.462 0±0 0.2±0.59
family_arrange <- family_summary %>%
group_by(family) %>%
summarise(mean=sum(relabun)) %>%
arrange(-mean) %>%
select(family) %>%
pull()
# Per environment
family_summary %>%
left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
filter(family %in% family_arrange[1:20]) %>%
mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
geom_jitter(alpha=0.5) +
facet_grid(.~environment)+
theme_minimal() +
labs(y="Family", x="Relative abundance", color="Phylum")family_summary %>%
group_by(family) %>%
summarise(total_mean=mean(relabun*100, na.rm=T),
total_sd=sd(relabun*100, na.rm=T),
le_mean=mean(relabun[river=="Leitzaran"]*100, na.rm=T),
le_sd=sd(relabun[river=="Leitzaran"]*100, na.rm=T),
ha_mean=mean(relabun[river=="Harpea"]*100, na.rm=T),
ha_sd=sd(relabun[river=="Harpea"]*100, na.rm=T),
er_mean=mean(relabun[river=="Erlan"]*100, na.rm=T),
er_sd=sd(relabun[river=="Erlan"]*100, na.rm=T),
go_mean=mean(relabun[river=="Goizueta"]*100, na.rm=T),
go_sd=sd(relabun[river=="Goizueta"]*100, na.rm=T)) %>%
mutate(Total=str_c(round(total_mean,3),"±",round(total_sd,3)),
Leitzaran=str_c(round(le_mean,3),"±",round(le_sd,3)),
Harpea=str_c(round(ha_mean,3),"±",round(ha_sd,3)),
Erlan=str_c(round(er_mean,3),"±",round(er_sd,3)),
Goizueta=str_c(round(go_mean,3),"±",round(go_sd,3))) %>%
arrange(-total_mean) %>%
dplyr::select(family,Total,Leitzaran,Goizueta,Harpea,Erlan)# A tibble: 14 × 6
family Total Leitzaran Goizueta Harpea Erlan
<chr> <chr> <chr> <chr> <chr> <chr>
1 f__Methylophilaceae 29.667±26.384 12.119±9.771 21.601±26.932 38.61±29.001 54.012±19.565
2 f__Burkholderiaceae_B 27.235±24.427 43.95±28.971 21.545±25.349 12.11±8.23 22.396±13.818
3 f__Pseudomonadaceae 17.505±22.204 17.201±21.401 25.097±30.276 24.43±19.778 5.843±6.843
4 f__Spirosomaceae 13.217±15.853 20.993±24.425 12.853±12.192 6.742±4.094 8.117±7.118
5 f__Moraxellaceae 3.763±9.946 2.005±4.134 3.298±5.524 14.552±25.274 0.868±1.166
6 f__Burkholderiaceae 2.468±5.746 0.238±0.715 7.324±8.924 0.795±1.59 0.349±0.803
7 f__Burkholderiaceae_A 1.898±6.249 2.562±6.41 3.559±9.588 0±0 0.23±0.65
8 f__Sphingomonadaceae 1.403±4.051 0±0 3.001±7.007 0±0 1.884±2.159
9 f__Flavobacteriaceae 1.282±4.516 0±0 0.088±0.265 0.503±1.005 4.456±8.258
10 f__Alteromonadaceae 0.671±1.439 0.93±1.884 0±0 2.259±2.212 0.342±0.439
11 f__Weeksellaceae 0.37±2.028 0±0 1.234±3.703 0±0 0±0
12 f__ 0.242±1.198 0±0 0±0 0±0 0.909±2.292
13 f__UBA6184 0.158±0.866 0±0 0±0 0±0 0.593±1.678
14 f__Rhodocyclaceae 0.12±0.462 0±0 0.4±0.806 0±0 0±0
genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(count > 0) %>% #filter 0 counts
ggplot(., aes(x=sample,y=count, fill=family, group=family)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_viridis(discrete = TRUE)+
facet_nested(~factor(environment, labels=c("low" = "Low altitude", "high" = "High altitude")), scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_rect(fill = "white"),
strip.text = element_text(size = 12, lineheight = 0.6,face="bold"),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
labs(fill="Family",y = "Relative abundance",x="Samples")
Family
# A tibble: 6 × 3
Family p_value p_adjust
<chr> <dbl> <dbl>
1 f__Burkholderiaceae 0.00101 0.00469
2 f__Burkholderiaceae_A 0.00165 0.00577
3 f__Rhodocyclaceae 0.000352 0.00282
4 f__Spirosomaceae 0.0143 0.0333
5 f__UBA6184 0.0143 0.0333
6 f__Weeksellaceae 0.000403 0.00282
5.2.1.4 Genus level
genus_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,phylum,genus, environment) %>%
summarise(relabun=sum(count))
# %>%
# filter(genus != "g__") %>%
# mutate(genus= sub("^g__", "", genus))
genus_summary %>%
group_by(genus) %>%
summarise(total_mean=mean(relabun*100, na.rm=T),
total_sd=sd(relabun*100, na.rm=T),
High_mean=mean(relabun[environment=="high"]*100, na.rm=T),
High_sd=sd(relabun[environment=="high"]*100, na.rm=T),
Low_mean=mean(relabun[environment=="low"]*100, na.rm=T),
Low_sd=sd(relabun[environment=="low"]*100, na.rm=T)) %>%
mutate(Total=str_c(round(total_mean,3),"±",round(total_sd,3)),
High=str_c(round(High_mean,3),"±",round(High_sd,3)),
Low=str_c(round(Low_mean,3),"±",round(Low_sd,3))) %>%
arrange(-total_mean) %>%
dplyr::select(genus,Total,High,Low) %>%
tt()| genus | Total | High | Low |
|---|---|---|---|
| g__Methylotenera | 17.548±22.587 | 37.152±23.413 | 4.479±7.969 |
| g__Pseudomonas_E | 17.505±22.204 | 12.039±14.84 | 21.149±25.756 |
| g__Rhodoferax_C | 15.176±22.446 | 6.546±5.573 | 20.929±27.417 |
| g__Methylophilus | 10.821±17.174 | 10.294±8.855 | 11.172±21.263 |
| g__Arcicella | 7.66±9.217 | 6.484±6.555 | 8.443±10.746 |
| g__JALRIJ01 | 5.558±12.997 | 1.174±2.749 | 8.48±16.145 |
| g__Ideonella | 4.853±8.717 | 8.171±10.782 | 2.641±6.439 |
| g__Acinetobacter | 3.763±9.946 | 5.429±14.848 | 2.652±4.779 |
| g__Paucibacter_A | 3.522±7.505 | 2.982±2.89 | 3.881±9.504 |
| g__JADJBS01 | 2.974±12.106 | 0±0 | 4.956±15.479 |
| g__JAEZVV01 | 1.898±6.249 | 0.153±0.531 | 3.061±7.928 |
| g__Telluria | 1.495±5.285 | 0.233±0.663 | 2.336±6.745 |
| g__Flavobacterium | 1.282±4.516 | 3.138±6.889 | 0.044±0.188 |
| g__Sphingobium | 1.066±3.922 | 0.415±0.626 | 1.5±5.049 |
| g__Undibacterium | 0.973±2.645 | 0.265±0.918 | 1.445±3.287 |
| g__ | 0.77±1.988 | 1.019±2.208 | 0.605±1.841 |
| g__Pararheinheimera | 0.671±1.439 | 0.981±1.532 | 0.465±1.378 |
| g__JAAFJR01 | 0.377±1.888 | 0.943±2.969 | 0±0 |
| g__Chryseobacterium | 0.37±2.028 | 0±0 | 0.617±2.618 |
| g__Novosphingobium | 0.336±1.068 | 0.841±1.594 | 0±0 |
| g__Pseudorhodoferax | 0.334±0.916 | 0.325±0.995 | 0.34±0.889 |
| g__UBA6184 | 0.158±0.866 | 0.395±1.37 | 0±0 |
| g__Zoogloea | 0.12±0.462 | 0±0 | 0.2±0.59 |
genus_summary_sort <- genus_summary %>%
filter(genus != "g__") %>%
group_by(genus) %>%
summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
arrange(-mean)
genus_summary %>%
filter(genus %in% genus_summary_sort$genus[1:20]) %>%
mutate(genus=factor(genus, levels=rev(genus_summary_sort %>% pull(genus)))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
scale_color_manual(values=phylum_colors) +
geom_jitter(alpha=0.5) +
facet_grid(.~environment)+
theme_minimal() +
theme(axis.text.y = element_text(size=6))+
labs(y="Family", x="Relative abundance", color="Phylum")genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(count > 0) %>% #filter 0 counts
ggplot(., aes(x=sample,y=count, fill=genus, group=genus)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
facet_nested(~factor(environment, labels=c("low" = "Low altitude", "high" = "High altitude")), scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_rect(fill = "white"),
strip.text = element_text(size = 12, lineheight = 0.6,face="bold"),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
labs(fill="Genera",y = "Relative abundance",x="Samples")Number of MAGs without genera-level annotation
5
| phylum | count_nogene | count_total | percentage |
|---|---|---|---|
| p__ | 2 | 2 | 100.000000 |
| p__Pseudomonadota | 3 | 35 | 8.571429 |
Percentage of MAGs without genus-level annotation
11.62791
Number of MAGs without species-level annotation
# A tibble: 1 × 1
Mag_nospecies
<int>
1 33
| phylum | count_nospecies | count_total | species_annotated | percentage_non_species | percentage_species |
|---|---|---|---|---|---|
| p__ | 2 | 2 | 0 | 100.00000 | 0.00000 |
| p__Bacteroidota | 5 | 6 | 1 | 83.33333 | 16.66667 |
| p__Pseudomonadota | 26 | 35 | 9 | 74.28571 | 25.71429 |
Percentage of MAGs without species-level annotation
76.74419